/***** Lobxxx Translate Finished ******/
/*
 * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
/*
 * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * <p>
 *  版权所有1999-2002,2004,2005 Apache软件基金会
 * 
 *  根据Apache许可证第20版("许可证")授权;您不得使用此文件,除非符合许可证您可以在获取许可证的副本
 * 
 *  http：// wwwapacheorg / licenses / LICENSE-20
 * 
 *  除非适用法律要求或书面同意,否则根据许可证分发的软件将按"原样"基础分发,无任何明示或暗示的保证或条件。请参阅许可证管理权限和限制许可证
 * 
 */

package com.sun.org.apache.xerces.internal.impl.xpath.regex;

import java.util.Vector;
import java.util.Hashtable;

/**
 * This class represents a node in parse tree.
 *
 * @xerces.internal
 *
 * <p>
 *  这个类表示解析树中的一个节点
 * 
 *  @xercesinternal
 * 
 * 
 * @version $Id: Token.java,v 1.7 2010/07/27 05:02:34 joehw Exp $
 */
class Token implements java.io.Serializable {

    private static final long serialVersionUID = 8484976002585487481L;

    static final boolean COUNTTOKENS = true;
    static int tokens = 0;

    static final int CHAR = 0;                  // Literal char
    static final int DOT = 11;                  // .
    static final int CONCAT = 1;                // XY
    static final int UNION = 2;                 // X|Y|Z
    static final int CLOSURE = 3;               // X*
    static final int RANGE = 4;                 // [a-zA-Z] etc.
    static final int NRANGE = 5;                // [^a-zA-Z] etc.
    static final int PAREN = 6;                 // (X) or (?:X)
    static final int EMPTY = 7;                 //
    static final int ANCHOR = 8;                // ^ $ \b \B \< \> \A \Z \z
    static final int NONGREEDYCLOSURE = 9;      // *? +?
    static final int STRING = 10;               // strings
    static final int BACKREFERENCE = 12;        // back references
    static final int LOOKAHEAD = 20;            // (?=...)
    static final int NEGATIVELOOKAHEAD = 21;    // (?!...)
    static final int LOOKBEHIND = 22;           // (?<=...)
    static final int NEGATIVELOOKBEHIND = 23;   // (?<!...)
    static final int INDEPENDENT = 24;          // (?>...)
    static final int MODIFIERGROUP = 25;        // (?ims-ims:...)
    static final int CONDITION = 26;            // (?(...)yes|no)

    static final int UTF16_MAX = 0x10ffff;

    final int type;

    static Token token_dot;
    static Token token_0to9;
    static Token token_wordchars;
    static Token token_not_0to9;
    static Token token_not_wordchars;
    static Token token_spaces;
    static Token token_not_spaces;
    static Token token_empty;
    static Token token_linebeginning;
    static Token token_linebeginning2;
    static Token token_lineend;
    static Token token_stringbeginning;
    static Token token_stringend;
    static Token token_stringend2;
    static Token token_wordedge;
    static Token token_not_wordedge;
    static Token token_wordbeginning;
    static Token token_wordend;
    static {
        Token.token_empty = new Token(Token.EMPTY);

        Token.token_linebeginning = Token.createAnchor('^');
        Token.token_linebeginning2 = Token.createAnchor('@');
        Token.token_lineend = Token.createAnchor('$');
        Token.token_stringbeginning = Token.createAnchor('A');
        Token.token_stringend = Token.createAnchor('z');
        Token.token_stringend2 = Token.createAnchor('Z');
        Token.token_wordedge = Token.createAnchor('b');
        Token.token_not_wordedge = Token.createAnchor('B');
        Token.token_wordbeginning = Token.createAnchor('<');
        Token.token_wordend = Token.createAnchor('>');

        Token.token_dot = new Token(Token.DOT);

        Token.token_0to9 = Token.createRange();
        Token.token_0to9.addRange('0', '9');
        Token.token_wordchars = Token.createRange();
        Token.token_wordchars.addRange('0', '9');
        Token.token_wordchars.addRange('A', 'Z');
        Token.token_wordchars.addRange('_', '_');
        Token.token_wordchars.addRange('a', 'z');
        Token.token_spaces = Token.createRange();
        Token.token_spaces.addRange('\t', '\t');
        Token.token_spaces.addRange('\n', '\n');
        Token.token_spaces.addRange('\f', '\f');
        Token.token_spaces.addRange('\r', '\r');
        Token.token_spaces.addRange(' ', ' ');

        Token.token_not_0to9 = Token.complementRanges(Token.token_0to9);
        Token.token_not_wordchars = Token.complementRanges(Token.token_wordchars);
        Token.token_not_spaces = Token.complementRanges(Token.token_spaces);
    }

    static Token.ParenToken createLook(int type, Token child) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ParenToken(type, child, 0);
    }
    static Token.ParenToken createParen(Token child, int pnumber) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ParenToken(Token.PAREN, child, pnumber);
    }
    static Token.ClosureToken createClosure(Token tok) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ClosureToken(Token.CLOSURE, tok);
    }
    static Token.ClosureToken createNGClosure(Token tok) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
    }
    static Token.ConcatToken createConcat(Token tok1, Token tok2) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ConcatToken(tok1, tok2);
    }
    static Token.UnionToken createConcat() {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.UnionToken(Token.CONCAT); // *** It is not a bug.
    }
    static Token.UnionToken createUnion() {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.UnionToken(Token.UNION);
    }
    static Token createEmpty() {
        return Token.token_empty;
    }
    static RangeToken createRange() {
        if (COUNTTOKENS)  Token.tokens ++;
        return new RangeToken(Token.RANGE);
    }
    static RangeToken createNRange() {
        if (COUNTTOKENS)  Token.tokens ++;
        return new RangeToken(Token.NRANGE);
    }
    static Token.CharToken createChar(int ch) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.CharToken(Token.CHAR, ch);
    }
    static private Token.CharToken createAnchor(int ch) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.CharToken(Token.ANCHOR, ch);
    }
    static Token.StringToken createBackReference(int refno) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.StringToken(Token.BACKREFERENCE, null, refno);
    }
    static Token.StringToken createString(String str) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.StringToken(Token.STRING, str, 0);
    }
    static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ModifierToken(child, add, mask);
    }
    static Token.ConditionToken createCondition(int refno, Token condition,
                                                Token yespat, Token nopat) {
        if (COUNTTOKENS)  Token.tokens ++;
        return new Token.ConditionToken(refno, condition, yespat, nopat);
    }

    protected Token(int type) {
        this.type = type;
    }

    /**
     * A number of children.
     * <p>
     * 一些孩子
     * 
     */
    int size() {
        return 0;
    }
    Token getChild(int index) {
        return null;
    }
    void addChild(Token tok) {
        throw new RuntimeException("Not supported.");
    }

                                                // for RANGE or NRANGE
    protected void addRange(int start, int end) {
        throw new RuntimeException("Not supported.");
    }
    protected void sortRanges() {
        throw new RuntimeException("Not supported.");
    }
    protected void compactRanges() {
        throw new RuntimeException("Not supported.");
    }
    protected void mergeRanges(Token tok) {
        throw new RuntimeException("Not supported.");
    }
    protected void subtractRanges(Token tok) {
        throw new RuntimeException("Not supported.");
    }
    protected void intersectRanges(Token tok) {
        throw new RuntimeException("Not supported.");
    }
    static Token complementRanges(Token tok) {
        return RangeToken.complementRanges(tok);
    }


    void setMin(int min) {                      // for CLOSURE
    }
    void setMax(int max) {                      // for CLOSURE
    }
    int getMin() {                              // for CLOSURE
        return -1;
    }
    int getMax() {                              // for CLOSURE
        return -1;
    }
    int getReferenceNumber() {                  // for STRING
        return 0;
    }
    String getString() {                        // for STRING
        return null;
    }

    int getParenNumber() {
        return 0;
    }
    int getChar() {
        return -1;
    }

    public String toString() {
        return this.toString(0);
    }
    public String toString(int options) {
        return this.type == Token.DOT ? "." : "";
    }

    /**
     * How many characters are needed?
     * <p>
     *  需要多少个字符?
     * 
     */
    final int getMinLength() {
        switch (this.type) {
          case CONCAT:
            int sum = 0;
            for (int i = 0;  i < this.size();  i ++)
                sum += this.getChild(i).getMinLength();
            return sum;

          case CONDITION:
          case UNION:
            if (this.size() == 0)
                return 0;
            int ret = this.getChild(0).getMinLength();
            for (int i = 1;  i < this.size();  i ++) {
                int min = this.getChild(i).getMinLength();
                if (min < ret)  ret = min;
            }
            return ret;

          case CLOSURE:
          case NONGREEDYCLOSURE:
            if (this.getMin() >= 0)
                return this.getMin() * this.getChild(0).getMinLength();
            return 0;

          case EMPTY:
          case ANCHOR:
            return 0;

          case DOT:
          case CHAR:
          case RANGE:
          case NRANGE:
            return 1;

          case INDEPENDENT:
          case PAREN:
          case MODIFIERGROUP:
            return this.getChild(0).getMinLength();

          case BACKREFERENCE:
            return 0;                           // *******

          case STRING:
            return this.getString().length();

          case LOOKAHEAD:
          case NEGATIVELOOKAHEAD:
          case LOOKBEHIND:
          case NEGATIVELOOKBEHIND:
            return 0;                           // ***** Really?

          default:
            throw new RuntimeException("Token#getMinLength(): Invalid Type: "+this.type);
        }
    }

    final int getMaxLength() {
        switch (this.type) {
          case CONCAT:
            int sum = 0;
            for (int i = 0;  i < this.size();  i ++) {
                int d = this.getChild(i).getMaxLength();
                if (d < 0)  return -1;
                sum += d;
            }
            return sum;

          case CONDITION:
          case UNION:
            if (this.size() == 0)
                return 0;
            int ret = this.getChild(0).getMaxLength();
            for (int i = 1;  ret >= 0 && i < this.size();  i ++) {
                int max = this.getChild(i).getMaxLength();
                if (max < 0) {                  // infinity
                    ret = -1;
                    break;
                }
                if (max > ret)  ret = max;
            }
            return ret;

          case CLOSURE:
          case NONGREEDYCLOSURE:
            if (this.getMax() >= 0)
                                                // When this.child.getMaxLength() < 0,
                                                // this returns minus value
                return this.getMax() * this.getChild(0).getMaxLength();
            return -1;

          case EMPTY:
          case ANCHOR:
            return 0;

          case CHAR:
            return 1;
          case DOT:
          case RANGE:
          case NRANGE:
            return 2;

          case INDEPENDENT:
          case PAREN:
          case MODIFIERGROUP:
            return this.getChild(0).getMaxLength();

          case BACKREFERENCE:
            return -1;                          // ******

          case STRING:
            return this.getString().length();

          case LOOKAHEAD:
          case NEGATIVELOOKAHEAD:
          case LOOKBEHIND:
          case NEGATIVELOOKBEHIND:
            return 0;                           // ***** Really?

          default:
            throw new RuntimeException("Token#getMaxLength(): Invalid Type: "+this.type);
        }
    }

    static final int FC_CONTINUE = 0;
    static final int FC_TERMINAL = 1;
    static final int FC_ANY = 2;
    private static final boolean isSet(int options, int flag) {
        return (options & flag) == flag;
    }
    final int analyzeFirstCharacter(RangeToken result, int options) {
        switch (this.type) {
          case CONCAT:
            int ret = FC_CONTINUE;
            for (int i = 0;  i < this.size();  i ++)
                if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE)
                    break;
            return ret;

          case UNION:
            if (this.size() == 0)
                return FC_CONTINUE;
            /*
             *  a|b|c -> FC_TERMINAL
             *  a|.|c -> FC_ANY
             *  a|b|  -> FC_CONTINUE
             * <p>
             *  a | b | c-> FC_TERMINAL a || c-> FC_ANY a | b | - > FC_CONTINUE
             * 
             */
            int ret2 = FC_CONTINUE;
            boolean hasEmpty = false;
            for (int i = 0;  i < this.size();  i ++) {
                ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
                if (ret2 == FC_ANY)
                    break;
                else if (ret2 == FC_CONTINUE)
                    hasEmpty = true;
            }
            return hasEmpty ? FC_CONTINUE : ret2;

          case CONDITION:
            int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
            if (this.size() == 1)  return FC_CONTINUE;
            if (ret3 == FC_ANY)  return ret3;
            int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
            if (ret4 == FC_ANY)  return ret4;
            return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;

          case CLOSURE:
          case NONGREEDYCLOSURE:
            this.getChild(0).analyzeFirstCharacter(result, options);
            return FC_CONTINUE;

          case EMPTY:
          case ANCHOR:
            return FC_CONTINUE;

          case CHAR:
            int ch = this.getChar();
            result.addRange(ch, ch);
            if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
                ch = Character.toUpperCase((char)ch);
                result.addRange(ch, ch);
                ch = Character.toLowerCase((char)ch);
                result.addRange(ch, ch);
            }
            return FC_TERMINAL;

          case DOT:
              return FC_ANY;

          case RANGE:
            result.mergeRanges(this);
            return FC_TERMINAL;

          case NRANGE:                          // ****
            result.mergeRanges(Token.complementRanges(this));
            return FC_TERMINAL;

          case INDEPENDENT:
          case PAREN:
            return this.getChild(0).analyzeFirstCharacter(result, options);

          case MODIFIERGROUP:
            options |= ((ModifierToken)this).getOptions();
            options &= ~((ModifierToken)this).getOptionsMask();
            return this.getChild(0).analyzeFirstCharacter(result, options);

          case BACKREFERENCE:
            result.addRange(0, UTF16_MAX);  // **** We can not optimize.
            return FC_ANY;

          case STRING:
            int cha = this.getString().charAt(0);
            int ch2;
            if (REUtil.isHighSurrogate(cha)
                && this.getString().length() >= 2
                && REUtil.isLowSurrogate((ch2 = this.getString().charAt(1))))
                cha = REUtil.composeFromSurrogates(cha, ch2);
            result.addRange(cha, cha);
            if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
                cha = Character.toUpperCase((char)cha);
                result.addRange(cha, cha);
                cha = Character.toLowerCase((char)cha);
                result.addRange(cha, cha);
            }
            return FC_TERMINAL;

          case LOOKAHEAD:
          case NEGATIVELOOKAHEAD:
          case LOOKBEHIND:
          case NEGATIVELOOKBEHIND:
            return FC_CONTINUE;

          default:
            throw new RuntimeException("Token#analyzeHeadCharacter(): Invalid Type: "+this.type);
        }
    }

    private final boolean isShorterThan(Token tok) {
        if (tok == null)  return false;
        /*
        int mylength;
        if (this.type == STRING)  mylength = this.getString().length();
        else if (this.type == CHAR)  mylength = this.getChar() >= 0x10000 ? 2 : 1;
        else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
        int otherlength;
        if (tok.type == STRING)  otherlength = tok.getString().length();
        else if (tok.type == CHAR)  otherlength = tok.getChar() >= 0x10000 ? 2 : 1;
        else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
        /* <p>
        /*  int mylength; if(thistype == STRING)mylength = thisgetString()length(); else if(thistype == CHAR)myl
        /* ength = thisgetChar()> = 0x10000? 2：1; else throw new RuntimeException("Internal Error：Illegal type："
        /* + thistype); int otherlength; if(toktype == STRING)otherlength = tokgetString()length(); else if(tokt
        /* ype == CHAR)otherlength = tokgetChar()> = 0x10000? 2：1; else throw new RuntimeException("Internal Err
        /* or：Illegal type："+ toktype);。
        /* 
        */
        int mylength;
        if (this.type == STRING)  mylength = this.getString().length();
        else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
        int otherlength;
        if (tok.type == STRING)  otherlength = tok.getString().length();
        else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
        return mylength < otherlength;
    }

    static class FixedStringContainer {
        Token token = null;
        int options = 0;
        FixedStringContainer() {
        }
    }

    final void findFixedString(FixedStringContainer container, int options) {
        switch (this.type) {
          case CONCAT:
            Token prevToken = null;
            int prevOptions = 0;
            for (int i = 0;  i < this.size();  i ++) {
                this.getChild(i).findFixedString(container, options);
                if (prevToken == null || prevToken.isShorterThan(container.token)) {
                    prevToken = container.token;
                    prevOptions = container.options;
                }
            }
            container.token = prevToken;
            container.options = prevOptions;
            return;

          case UNION:
          case CLOSURE:
          case NONGREEDYCLOSURE:
          case EMPTY:
          case ANCHOR:
          case RANGE:
          case DOT:
          case NRANGE:
          case BACKREFERENCE:
          case LOOKAHEAD:
          case NEGATIVELOOKAHEAD:
          case LOOKBEHIND:
          case NEGATIVELOOKBEHIND:
          case CONDITION:
            container.token = null;
            return;

          case CHAR:                            // Ignore CHAR tokens.
            container.token = null;             // **
            return;                             // **

          case STRING:
            container.token = this;
            container.options = options;
            return;

          case INDEPENDENT:
          case PAREN:
            this.getChild(0).findFixedString(container, options);
            return;

          case MODIFIERGROUP:
            options |= ((ModifierToken)this).getOptions();
            options &= ~((ModifierToken)this).getOptionsMask();
            this.getChild(0).findFixedString(container, options);
            return;

          default:
            throw new RuntimeException("Token#findFixedString(): Invalid Type: "+this.type);
        }
    }

    boolean match(int ch) {
        throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
    }

    // ------------------------------------------------------
    private final static Hashtable categories = new Hashtable();
    private final static Hashtable categories2 = new Hashtable();
    private static final String[] categoryNames = {
        "Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
        "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
        "Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
        "Pi", "Pf",  // 29, 30
        "L", "M", "N", "Z", "C", "P", "S",      // 31-37
    };

    // Schema Rec. {Datatypes} - Punctuation
    static final int CHAR_INIT_QUOTE  = 29;     // Pi - initial quote
    static final int CHAR_FINAL_QUOTE = 30;     // Pf - final quote
    static final int CHAR_LETTER = 31;
    static final int CHAR_MARK = 32;
    static final int CHAR_NUMBER = 33;
    static final int CHAR_SEPARATOR = 34;
    static final int CHAR_OTHER = 35;
    static final int CHAR_PUNCTUATION = 36;
    static final int CHAR_SYMBOL = 37;

    //blockNames in UNICODE 3.1 that supported by XML Schema REC
    private static final String[] blockNames = {
        /*0000..007F;*/ "Basic Latin",
        /* <p>
        /* 
        /*0080..00FF;*/ "Latin-1 Supplement",
        /* <p>
        /* 
        /*0100..017F;*/ "Latin Extended-A",
        /* <p>
        /* 
        /*0180..024F;*/ "Latin Extended-B",
        /* <p>
        /* 
        /*0250..02AF;*/ "IPA Extensions",
        /* <p>
        /* 
        /*02B0..02FF;*/ "Spacing Modifier Letters",
        /* <p>
        /* 
        /*0300..036F;*/ "Combining Diacritical Marks",
        /* <p>
        /* 
        /*0370..03FF;*/ "Greek",
        /* <p>
        /* 
        /*0400..04FF;*/ "Cyrillic",
        /* <p>
        /* 
        /*0530..058F;*/ "Armenian",
        /* <p>
        /* 
        /*0590..05FF;*/ "Hebrew",
        /* <p>
        /* 
        /*0600..06FF;*/ "Arabic",
        /* <p>
        /* 
        /*0700..074F;*/ "Syriac",
        /* <p>
        /* 
        /*0780..07BF;*/ "Thaana",
        /* <p>
        /* 
        /*0900..097F;*/ "Devanagari",
        /* <p>
        /* 
        /*0980..09FF;*/ "Bengali",
        /* <p>
        /* 
        /*0A00..0A7F;*/ "Gurmukhi",
        /* <p>
        /* 
        /*0A80..0AFF;*/ "Gujarati",
        /* <p>
        /* 
        /*0B00..0B7F;*/ "Oriya",
        /* <p>
        /* 
        /*0B80..0BFF;*/ "Tamil",
        /* <p>
        /* 
        /*0C00..0C7F;*/ "Telugu",
        /* <p>
        /* 
        /*0C80..0CFF;*/ "Kannada",
        /* <p>
        /* 
        /*0D00..0D7F;*/ "Malayalam",
        /* <p>
        /* 
        /*0D80..0DFF;*/ "Sinhala",
        /* <p>
        /* 
        /*0E00..0E7F;*/ "Thai",
        /* <p>
        /* 
        /*0E80..0EFF;*/ "Lao",
        /* <p>
        /* 
        /*0F00..0FFF;*/ "Tibetan",
        /* <p>
        /* 
        /*1000..109F;*/ "Myanmar",
        /* <p>
        /* 
        /*10A0..10FF;*/ "Georgian",
        /* <p>
        /* 
        /*1100..11FF;*/ "Hangul Jamo",
        /* <p>
        /* 
        /*1200..137F;*/ "Ethiopic",
        /* <p>
        /* 
        /*13A0..13FF;*/ "Cherokee",
        /* <p>
        /* 
        /*1400..167F;*/ "Unified Canadian Aboriginal Syllabics",
        /* <p>
        /* 
        /*1680..169F;*/ "Ogham",
        /* <p>
        /* 
        /*16A0..16FF;*/ "Runic",
        /* <p>
        /* 
        /*1780..17FF;*/ "Khmer",
        /* <p>
        /* 
        /*1800..18AF;*/ "Mongolian",
        /* <p>
        /* 
        /*1E00..1EFF;*/ "Latin Extended Additional",
        /* <p>
        /* 
        /*1F00..1FFF;*/ "Greek Extended",
        /* <p>
        /* 
        /*2000..206F;*/ "General Punctuation",
        /* <p>
        /* 
        /*2070..209F;*/ "Superscripts and Subscripts",
        /* <p>
        /* 
        /*20A0..20CF;*/ "Currency Symbols",
        /* <p>
        /* 
        /*20D0..20FF;*/ "Combining Marks for Symbols",
        /* <p>
        /* 
        /*2100..214F;*/ "Letterlike Symbols",
        /* <p>
        /* 
        /*2150..218F;*/ "Number Forms",
        /* <p>
        /* 
        /*2190..21FF;*/ "Arrows",
        /* <p>
        /* 
        /*2200..22FF;*/ "Mathematical Operators",
        /* <p>
        /* 
        /*2300..23FF;*/ "Miscellaneous Technical",
        /* <p>
        /* 
        /*2400..243F;*/ "Control Pictures",
        /* <p>
        /* 
        /*2440..245F;*/ "Optical Character Recognition",
        /* <p>
        /* 
        /*2460..24FF;*/ "Enclosed Alphanumerics",
        /* <p>
        /* 
        /*2500..257F;*/ "Box Drawing",
        /* <p>
        /* 
        /*2580..259F;*/ "Block Elements",
        /* <p>
        /* 
        /*25A0..25FF;*/ "Geometric Shapes",
        /* <p>
        /* 
        /*2600..26FF;*/ "Miscellaneous Symbols",
        /* <p>
        /* 
        /*2700..27BF;*/ "Dingbats",
        /* <p>
        /* 
        /*2800..28FF;*/ "Braille Patterns",
        /* <p>
        /* 
        /*2E80..2EFF;*/ "CJK Radicals Supplement",
        /* <p>
        /* 
        /*2F00..2FDF;*/ "Kangxi Radicals",
        /* <p>
        /* 
        /*2FF0..2FFF;*/ "Ideographic Description Characters",
        /* <p>
        /* 
        /*3000..303F;*/ "CJK Symbols and Punctuation",
        /* <p>
        /* 
        /*3040..309F;*/ "Hiragana",
        /* <p>
        /* 
        /*30A0..30FF;*/ "Katakana",
        /* <p>
        /* 
        /*3100..312F;*/ "Bopomofo",
        /* <p>
        /* 
        /*3130..318F;*/ "Hangul Compatibility Jamo",
        /* <p>
        /* 
        /*3190..319F;*/ "Kanbun",
        /* <p>
        /* 
        /*31A0..31BF;*/ "Bopomofo Extended",
        /* <p>
        /* 
        /*3200..32FF;*/ "Enclosed CJK Letters and Months",
        /* <p>
        /* 
        /*3300..33FF;*/ "CJK Compatibility",
        /* <p>
        /* 
        /*3400..4DB5;*/ "CJK Unified Ideographs Extension A",
        /* <p>
        /* 
        /*4E00..9FFF;*/ "CJK Unified Ideographs",
        /* <p>
        /* 
        /*A000..A48F;*/ "Yi Syllables",
        /* <p>
        /* 
        /*A490..A4CF;*/ "Yi Radicals",
        /* <p>
        /* 
        /*AC00..D7A3;*/ "Hangul Syllables",
        /* <p>
        /* 
        /*E000..F8FF;*/ "Private Use",
        /* <p>
        /* 
        /*F900..FAFF;*/ "CJK Compatibility Ideographs",
        /* <p>
        /* 
        /*FB00..FB4F;*/ "Alphabetic Presentation Forms",
        /* <p>
        /* 
        /*FB50..FDFF;*/ "Arabic Presentation Forms-A",
        /* <p>
        /* 
        /*FE20..FE2F;*/ "Combining Half Marks",
        /* <p>
        /* 
        /*FE30..FE4F;*/ "CJK Compatibility Forms",
        /* <p>
        /* 
        /*FE50..FE6F;*/ "Small Form Variants",
        /* <p>
        /* 
        /*FE70..FEFE;*/ "Arabic Presentation Forms-B",
        /* <p>
        /* 
        /*FEFF..FEFF;*/ "Specials",
        /* <p>
        /* 
        /*FF00..FFEF;*/ "Halfwidth and Fullwidth Forms",
         //missing Specials add manually
        /* <p>
        /*  // missing手动添加Specials
        /* 
        /* 
        /*10300..1032F;*/ "Old Italic",         // 84
        /* <p>
        /* 
        /*10330..1034F;*/ "Gothic",
        /* <p>
        /* 
        /*10400..1044F;*/ "Deseret",
        /* <p>
        /* 
        /*1D000..1D0FF;*/ "Byzantine Musical Symbols",
        /* <p>
        /* 
        /*1D100..1D1FF;*/ "Musical Symbols",
        /* <p>
        /* 
        /*1D400..1D7FF;*/ "Mathematical Alphanumeric Symbols",
        /* <p>
        /* 
        /*20000..2A6D6;*/ "CJK Unified Ideographs Extension B",
        /* <p>
        /* 
        /*2F800..2FA1F;*/ "CJK Compatibility Ideographs Supplement",
        /* <p>
        /* 
        /*E0000..E007F;*/ "Tags",
        //missing 2 private use add manually

    };
    //ADD THOSE MANUALLY
    //F0000..FFFFD; "Private Use",
    //100000..10FFFD; "Private Use"
    //FFF0..FFFD; "Specials",
    static final String blockRanges =
       "\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
        +"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
        +"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF"
        +"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF"
        +"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF"
        +"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF"
        +"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF"
        +"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F"
        +"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF"
        +"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF"
        +"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF";
    static final int[] nonBMPBlockRanges = {
        0x10300, 0x1032F,       // 84
        0x10330, 0x1034F,
        0x10400, 0x1044F,
        0x1D000, 0x1D0FF,
        0x1D100, 0x1D1FF,
        0x1D400, 0x1D7FF,
        0x20000, 0x2A6D6,
        0x2F800, 0x2FA1F,
        0xE0000, 0xE007F
    };
    private static final int NONBMP_BLOCK_START = 84;

    static protected RangeToken getRange(String name, boolean positive) {
        if (Token.categories.size() == 0) {
            synchronized (Token.categories) {
                Token[] ranges = new Token[Token.categoryNames.length];
                for (int i = 0;  i < ranges.length;  i ++) {
                    ranges[i] = Token.createRange();
                }
                int type;
                for (int i = 0;  i < 0x10000;  i ++) {
                    type = Character.getType((char)i);
                    if (type == Character.START_PUNCTUATION ||
                        type == Character.END_PUNCTUATION) {
                        //build table of Pi values
                        if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
                            i == 0x201F || i == 0x2039) {
                            type = CHAR_INIT_QUOTE;
                        }
                        //build table of Pf values
                        if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
                            type = CHAR_FINAL_QUOTE;
                        }
                    }
                    ranges[type].addRange(i, i);
                    switch (type) {
                      case Character.UPPERCASE_LETTER:
                      case Character.LOWERCASE_LETTER:
                      case Character.TITLECASE_LETTER:
                      case Character.MODIFIER_LETTER:
                      case Character.OTHER_LETTER:
                        type = CHAR_LETTER;
                        break;
                      case Character.NON_SPACING_MARK:
                      case Character.COMBINING_SPACING_MARK:
                      case Character.ENCLOSING_MARK:
                        type = CHAR_MARK;
                        break;
                      case Character.DECIMAL_DIGIT_NUMBER:
                      case Character.LETTER_NUMBER:
                      case Character.OTHER_NUMBER:
                        type = CHAR_NUMBER;
                        break;
                      case Character.SPACE_SEPARATOR:
                      case Character.LINE_SEPARATOR:
                      case Character.PARAGRAPH_SEPARATOR:
                        type = CHAR_SEPARATOR;
                        break;
                      case Character.CONTROL:
                      case Character.FORMAT:
                      case Character.SURROGATE:
                      case Character.PRIVATE_USE:
                      case Character.UNASSIGNED:
                        type = CHAR_OTHER;
                        break;
                      case Character.CONNECTOR_PUNCTUATION:
                      case Character.DASH_PUNCTUATION:
                      case Character.START_PUNCTUATION:
                      case Character.END_PUNCTUATION:
                      case CHAR_INIT_QUOTE:
                      case CHAR_FINAL_QUOTE:
                      case Character.OTHER_PUNCTUATION:
                        type = CHAR_PUNCTUATION;
                        break;
                      case Character.MATH_SYMBOL:
                      case Character.CURRENCY_SYMBOL:
                      case Character.MODIFIER_SYMBOL:
                      case Character.OTHER_SYMBOL:
                        type = CHAR_SYMBOL;
                        break;
                      default:
                        throw new RuntimeException("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type);
                    }
                    ranges[type].addRange(i, i);
                } // for all characters
                ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);

                for (int i = 0;  i < ranges.length;  i ++) {
                    if (Token.categoryNames[i] != null) {
                        if (i == Character.UNASSIGNED) { // Unassigned
                            ranges[i].addRange(0x10000, Token.UTF16_MAX);
                        }
                        Token.categories.put(Token.categoryNames[i], ranges[i]);
                        Token.categories2.put(Token.categoryNames[i],
                                              Token.complementRanges(ranges[i]));
                    }
                }
                //REVISIT: do we really need to support block names as in Unicode 3.1
                //         or we can just create all the names in IsBLOCKNAME format (XML Schema REC)?
                //
                StringBuffer buffer = new StringBuffer(50);
                for (int i = 0;  i < Token.blockNames.length;  i ++) {
                    Token r1 = Token.createRange();
                    int location;
                    if (i < NONBMP_BLOCK_START) {
                        location = i*2;
                        int rstart = Token.blockRanges.charAt(location);
                        int rend = Token.blockRanges.charAt(location+1);
                        //DEBUGING
                        //System.out.println(n+" " +Integer.toHexString(rstart)
                        //                     +"-"+ Integer.toHexString(rend));
                        r1.addRange(rstart, rend);
                    } else {
                        location = (i - NONBMP_BLOCK_START) * 2;
                        r1.addRange(Token.nonBMPBlockRanges[location],
                                    Token.nonBMPBlockRanges[location + 1]);
                    }
                    String n = Token.blockNames[i];
                    if (n.equals("Specials"))
                        r1.addRange(0xfff0, 0xfffd);
                    if (n.equals("Private Use")) {
                        r1.addRange(0xF0000,0xFFFFD);
                        r1.addRange(0x100000,0x10FFFD);
                    }
                    Token.categories.put(n, r1);
                    Token.categories2.put(n, Token.complementRanges(r1));
                    buffer.setLength(0);
                    buffer.append("Is");
                    if (n.indexOf(' ') >= 0) {
                        for (int ci = 0;  ci < n.length();  ci ++)
                            if (n.charAt(ci) != ' ')  buffer.append((char)n.charAt(ci));
                    }
                    else {
                        buffer.append(n);
                    }
                    Token.setAlias(buffer.toString(), n, true);
                }

                // TR#18 1.2
                Token.setAlias("ASSIGNED", "Cn", false);
                Token.setAlias("UNASSIGNED", "Cn", true);
                Token all = Token.createRange();
                all.addRange(0, Token.UTF16_MAX);
                Token.categories.put("ALL", all);
                Token.categories2.put("ALL", Token.complementRanges(all));
                Token.registerNonXS("ASSIGNED");
                Token.registerNonXS("UNASSIGNED");
                Token.registerNonXS("ALL");

                Token isalpha = Token.createRange();
                isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); // Lu
                isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); // Ll
                isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); // Lo
                Token.categories.put("IsAlpha", isalpha);
                Token.categories2.put("IsAlpha", Token.complementRanges(isalpha));
                Token.registerNonXS("IsAlpha");

                Token isalnum = Token.createRange();
                isalnum.mergeRanges(isalpha);   // Lu Ll Lo
                isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); // Nd
                Token.categories.put("IsAlnum", isalnum);
                Token.categories2.put("IsAlnum", Token.complementRanges(isalnum));
                Token.registerNonXS("IsAlnum");

                Token isspace = Token.createRange();
                isspace.mergeRanges(Token.token_spaces);
                isspace.mergeRanges(ranges[CHAR_SEPARATOR]); // Z
                Token.categories.put("IsSpace", isspace);
                Token.categories2.put("IsSpace", Token.complementRanges(isspace));
                Token.registerNonXS("IsSpace");

                Token isword = Token.createRange();
                isword.mergeRanges(isalnum);     // Lu Ll Lo Nd
                isword.addRange('_', '_');
                Token.categories.put("IsWord", isword);
                Token.categories2.put("IsWord", Token.complementRanges(isword));
                Token.registerNonXS("IsWord");

                Token isascii = Token.createRange();
                isascii.addRange(0, 127);
                Token.categories.put("IsASCII", isascii);
                Token.categories2.put("IsASCII", Token.complementRanges(isascii));
                Token.registerNonXS("IsASCII");

                Token isnotgraph = Token.createRange();
                isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
                isnotgraph.addRange(' ', ' ');
                Token.categories.put("IsGraph", Token.complementRanges(isnotgraph));
                Token.categories2.put("IsGraph", isnotgraph);
                Token.registerNonXS("IsGraph");

                Token isxdigit = Token.createRange();
                isxdigit.addRange('0', '9');
                isxdigit.addRange('A', 'F');
                isxdigit.addRange('a', 'f');
                Token.categories.put("IsXDigit", Token.complementRanges(isxdigit));
                Token.categories2.put("IsXDigit", isxdigit);
                Token.registerNonXS("IsXDigit");

                Token.setAlias("IsDigit", "Nd", true);
                Token.setAlias("IsUpper", "Lu", true);
                Token.setAlias("IsLower", "Ll", true);
                Token.setAlias("IsCntrl", "C", true);
                Token.setAlias("IsPrint", "C", false);
                Token.setAlias("IsPunct", "P", true);
                Token.registerNonXS("IsDigit");
                Token.registerNonXS("IsUpper");
                Token.registerNonXS("IsLower");
                Token.registerNonXS("IsCntrl");
                Token.registerNonXS("IsPrint");
                Token.registerNonXS("IsPunct");

                Token.setAlias("alpha", "IsAlpha", true);
                Token.setAlias("alnum", "IsAlnum", true);
                Token.setAlias("ascii", "IsASCII", true);
                Token.setAlias("cntrl", "IsCntrl", true);
                Token.setAlias("digit", "IsDigit", true);
                Token.setAlias("graph", "IsGraph", true);
                Token.setAlias("lower", "IsLower", true);
                Token.setAlias("print", "IsPrint", true);
                Token.setAlias("punct", "IsPunct", true);
                Token.setAlias("space", "IsSpace", true);
                Token.setAlias("upper", "IsUpper", true);
                Token.setAlias("word", "IsWord", true); // Perl extension
                Token.setAlias("xdigit", "IsXDigit", true);
                Token.registerNonXS("alpha");
                Token.registerNonXS("alnum");
                Token.registerNonXS("ascii");
                Token.registerNonXS("cntrl");
                Token.registerNonXS("digit");
                Token.registerNonXS("graph");
                Token.registerNonXS("lower");
                Token.registerNonXS("print");
                Token.registerNonXS("punct");
                Token.registerNonXS("space");
                Token.registerNonXS("upper");
                Token.registerNonXS("word");
                Token.registerNonXS("xdigit");
            } // synchronized
        } // if null
        RangeToken tok = positive ? (RangeToken)Token.categories.get(name)
            : (RangeToken)Token.categories2.get(name);
        //if (tok == null) System.out.println(name);
        return tok;
    }
    static protected RangeToken getRange(String name, boolean positive, boolean xs) {
        RangeToken range = Token.getRange(name, positive);
        if (xs && range != null && Token.isRegisterNonXS(name))
            range = null;
        return range;
    }

    static Hashtable nonxs = null;
    /**
     * This method is called by only getRange().
     * So this method need not MT-safe.
     * <p>
     *  // missing 2 private use手动添加
     * 
     * }; // ADD THOSE MANUALLY // F0000FFFFD; "Private Use",// 10000010FFFD; "私人使用"// FFF0FFFD; "Specials",
     * static final String blockRanges ="\\ u0000 \\ u007F \\ u0080 \\ u00FF \\ u0100 \\ u017F \\ u0180 \\ u
     * 024F \\ u0250 \\ u02AF \\ u02F0 \\ u02FF \\ u0300 \\ u036F"+"\\ u0370 \\ u03FF \\ u0400 \\ u04FF \\ u
     * 0530 \\ u058F \\ u0590 \\ u05FF \\ u0600 \\ u06FF \\ u0700 \\ u074F \\ u0780 \\ u07BF"+"\\ u0900 \\ u
     * 097F \\ u0980 \\ u09FF \\ u0A00 \\ u0A7F \\ u0A80 \\ u0AFF \\ u0B00 \\ u0B7F \\ u0B80 \\ u0BFF \\ u0C
     * 00 \\ u0C7F \\ u0C80 \\ u0CFF"+"\\ u0D00 \\ u0D7F \\ u0D80 \\ u0DFF \\ u0E00 \\ u0E7F \\ u0E80 \\ u0E
     * FF \\ u0F00 \\ u0FFF \\ u1000 \\ u109F \\ u10A0 \\ u10FF \\ u1100 \\ u11FF"+"\\ u1200 \\ u137F \\ u13
     * A0 \\ u13FF \\ u1400 \\ u167F \\ u1680 \\ u169F \\ u16A0 \\ u16FF \\ u1780 \\ u17FF \\ u1800 \\ u18AF
     *  \\ u1E00 \\ u1EFF"+"\\ u1F00 \\ u1FFF \\ u2000 \\ u206F \\ u2070 \\ u209F \\ u20A0 \\ u20CF \\ u20D0
     *  \\ u20FF \\ u2100 \\ u214F \\ u2150 \\ u218F \\ u2190 \\ u21FF \\ u2200 \\ u22FF"+"\\ u2300 \\ u23FF
     *  \\ u2400 \\ u243F \\ u2440 \\ u245F \\ u2460 \\ u24FF \\ u2500 \\ u257F \\ u2580 \\ u259F \\ u25A0 \
     * \ u25FF \\ u2600 \\ u26FF \\ u2700 \\ u27BF"+"\\ u2800 \\ u28FF \\ u2E80 \\ u2EFF \\ u2F00 \\ u2FDF \
     * \ u2FF0 \\ u2FFF \\ u3000 \\ u303F \\ u3040 \\ u309F \\ u30A0 \\ u30FF \\ u3100 \\ u312F \\ u3130 \\ 
     * u318F"+"\\ u3190 \\ u319F \\ u31A0 \\ u31BF \\ u3200 \\ u32FF \\ u3300 \\ u33FF \\ u3400 \\ u4DB5 \\ 
     * u4E00 \\ u9FFF \\ uA000 \\ uA48F \\ uA490 \\ uA4CF"+"\\ uAC00 \\ uD7A3 \\ uE000 \\ uF8FF \\ uF900 \\ 
     * uFAFF \\ uFB00 \\ uFB4F \\ uFB50 \\ uFDEF"+"\\ uFE20 \\ uFE2F \\ uFE30 \\ uFE4F \\ uFE50 \\ uFE6F \\ 
     * uFE70 \\ uFEFE \\ uFEFF \\ uFEFF \\ uFF00 \\ uFFEF"; static final int [] nonBMPBlockRanges = {0x10300,0x1032F,// 84 0x10330,0x1034F,0x10400,0x1044F,0x1D000,0x1D0FF,0x1D100,0x1D1FF,0x1D400,0x1D7FF,0x20000,0x2A6D6,0x2F800,0x2FA1F ,0xE0000,0xE007F}
     * ; private static final int NONBMP_BLOCK_START = 84;}; // ADD THOSE MANUALLY // F0000FFFFD; "Private U
     * se",// 10000010FFFD; "私人使用"// FFF0。
     * 
     * 静态保护RangeToken getRange(String name,boolean positive){if(Tokencategoriessize()== 0){synchronized(Tokencategories){Token [] ranges = new Token [TokencategoryNameslength]; for(int i = 0; i <rangeslength; i ++){ranges [i] = TokencreateRange(); } int type; for(int i = 0; i <0x10000; i ++){type = CharactergetType((char)i); if(type == CharacterSTART_PUNCTUATION || type == Character END_PUNCTUATION){//构建Pi值表if(i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C || i == 0x201F || i == 0x2039){type = CHAR_INIT_QUOTE; } //构建Pf值表if(i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A){type = CHAR_FINAL_QUOTE; }} ranges [type] addRange(i,i); switch(type){case CharacterUPPERCASE_LETTER：case字符LOWERCASE_LETTER：case CharacterTITLECASE_LETTER：case CharacterMODIFIER_LETTER：case CharacterOTHER_LETTER：type = CHAR_LETTER;打破; case CharacterNON_SPACING_MARK：case CharacterCOMBINING_SPACING_MARK：case CharacterENCLOSING_MARK：type = CHAR_MARK;打破; case CharacterDECIMAL_DIGIT_NUMBER：case CharacterLETTER_NUMBER：case CharacterOTHER_NUMBER：type = CHAR_NUMBER;打破; case CharacterSPACE_SEPARATOR：case CharacterLINE_SEPARATOR：case CharacterPARAGRAPH_SEPARATOR：type = CHAR_SEPARATOR;打破; case CharacterCONTROL：case CharacterFORMAT：case CharacterSURROGATE：case CharacterPRIVATE_USE：case CharacterUNASSIGNED：type = CHAR_OTHER;打破; case CharacterCONNECTOR_PUNCTUATION：case CharacterDASH_PUNCTUATION：case字符START_PUNCTUATION：case字符END_PUNCTUATION：case CHAR_INIT_QUOTE：case CHAR_FINAL_QUOTE：case CharacterOTHER_PUNCTUATION：type = CHAR_PUNCTUATION;打破; case CharacterMATH_SYMBOL：case CharacterCURRENCY_SYMBOL：case CharacterMODIFIER_SYMBOL：case CharacterOTHER_SYMBOL：type = CHAR_SYMBOL;打破; default：throw new RuntimeException("orgapachexercesutilsregexToken#getRange()：Unknown Unicode category："+ type); } ranges [type] addRange(i,i); } // for all characters ranges [CharacterUNASSIGNED] addRange(0x10000,TokenUTF16_MAX);。
     * 
     * if(TokencategoryNames [i]！= null){if(i == CharacterUNASSIGNED){//未分配的区域[i] addRange(0x10000,TokenUTF16_MAX); }
     *  Tokencategoriesput(TokencategoryNames [i],ranges [i]); Tokencategories2put(TokencategoryNames [i],To
     * kencomplementRanges(ranges [i])); }} // REVISIT：我们真的需要支持块名称在Unicode 31 //或者我们可以创建所有的名称以IsBLOCKNAME格式(
     * XML Schema REC)? // StringBuffer buffer = new StringBuffer(50); for(int i = 0; i <TokenblockNameslength; i ++){Token r1 = TokencreateRange(); int location; if(i <NONBMP_BLOCK_START){location = i * 2; int rstart = TokenblockRangescharAt(location); int rend = TokenblockRangescharAt(location + 1); // DEBUGING // Systemoutprintln(n +""+ IntegertoHexString(rstart)// +" - "+ IntegertoHexString(rend)); r1addRange(rstart,rend); } else {location =(i  -  NONBMP_BLOCK_START)* 2; r1addRange(TokenonBMPBlockRanges [location],TokennonBMPBlockRanges [location + 1]); } String n = TokenblockNames [i]; if(nequals("Specials"))r1addRange(0xfff0,0xfffd); if(nequals("Private Use")){r1addRange(0xF0000,0xFFFFD); r1addRange(0x100000,0x10FFFD); } Tokencategoriesput(n,r1); Tokencategories2put(n,TokencomplementRanges(r1)); buffersetLength(0); bufferappend("Is");如果(nindexOf('')>
     *  = 0){for(int ci = 0; ci <nlength(); ci ++)if(ncharAt(ci)！='')bufferappend((char)ncharAt(ci)) ; } else {bufferappend(n); } TokensetAlias(buffertoString(),n,true); }}。
     * 
     * // TR#18 12 TokensetAlias("ASSIGNED","Cn",false); TokensetAlias("UNASSIGNED","Cn",true); Token all = 
     * TokencreateRange(); alladdRange(0,TokenUTF16_MAX); Tokencategoriesput("ALL",all); Tokencategories2put
     * ("ALL",TokencomplementRanges(all)); TokenregisterNonXS("ASSIGNED"); TokenregisterNonXS("UNASSIGNED");
     *  TokenregisterNonXS("ALL");。
     * 
     *  Token isalpha = TokencreateRange(); isalphamergeRanges(ranges [CharacterUPPERCASE_LETTER]); // Lu is
     * alphamergeRanges(ranges [CharacterLOWERCASE_LETTER]); // Ll isalphamergeRanges(ranges [CharacterOTHER
     * _LETTER]); // Lo Tokencategoriesput("IsAlpha",isalpha); Tokencategories2put("IsAlpha",Tokencomplement
     * Ranges(isalpha)); TokenregisterNonXS("IsAlpha");。
     * 
     * Token isalnum = TokencreateRange(); isalnummergeRanges(isalpha); // Lu Ll Lo isalnummergeRanges(range
     * s [CharacterDECIMAL_DIGIT_NUMBER]); // Nd Tokencategoriesput("IsAlnum",isalnum); Tokencategories2put(
     * "IsAlnum",TokencomplementRanges(isalnum)); TokenregisterNonXS("IsAlnum");。
     * 
     *  Token isspace = TokencreateRange(); isspacemergeRanges(Tokentoken_spaces); isspacemergeRanges(ranges
     *  [CHAR_SEPARATOR]); // Z Tokencategoriesput("IsSpace",isspace); Tokencategories2put("IsSpace",Tokenco
     * mplementRanges(isspace)); TokenregisterNonXS("IsSpace");。
     * 
     * Token isword = TokencreateRange(); iswordmergeRanges(isalnum); // Lu Ll Lo Nd iswordaddRange('_','_')
     * ; Tokencategoriesput("IsWord",isword); Tokencategories2put("IsWord",TokencomplementRanges(isword)); T
     * okenregisterNonXS("IsWord");。
     * 
     *  Token isascii = TokencreateRange(); isasciiaddRange(0,127); Tokencategoriesput("IsASCII",isascii); T
     * okencategories2put("IsASCII",TokencomplementRanges(isascii)); TokenregisterNonXS("IsASCII");。
     * 
     *  Token isnotgraph = TokencreateRange(); isnotgraphmergeRanges(ranges [CHAR_OTHER]); isnotgraphaddRang
     */
    static protected void registerNonXS(String name) {
        if (Token.nonxs == null)
            Token.nonxs = new Hashtable();
        Token.nonxs.put(name, name);
    }
    static protected boolean isRegisterNonXS(String name) {
        if (Token.nonxs == null)
            return false;
        //DEBUG
        //System.err.println("isRegisterNonXS: "+name);
        return Token.nonxs.containsKey(name);
    }

    private static void setAlias(String newName, String name, boolean positive) {
        Token t1 = (Token)Token.categories.get(name);
        Token t2 = (Token)Token.categories2.get(name);
        if (positive) {
            Token.categories.put(newName, t1);
            Token.categories2.put(newName, t2);
        } else {
            Token.categories2.put(newName, t1);
            Token.categories.put(newName, t2);
        }
    }

    // ------------------------------------------------------

    static final String viramaString =
    "\u094D"// ;DEVANAGARI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u09CD"//;BENGALI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0A4D"//;GURMUKHI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0ACD"//;GUJARATI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0B4D"//;ORIYA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0BCD"//;TAMIL SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0C4D"//;TELUGU SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0CCD"//;KANNADA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0D4D"//;MALAYALAM SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
    +"\u0E3A"//;THAI CHARACTER PHINTHU;Mn;9;ON;;;;;N;THAI VOWEL SIGN PHINTHU;;;;
    +"\u0F84";//;TIBETAN MARK HALANTA;Mn;9;ON;;;;;N;TIBETAN VIRAMA;;;;

    static private Token token_grapheme = null;
    static synchronized Token getGraphemePattern() {
        if (Token.token_grapheme != null)
            return Token.token_grapheme;

        Token base_char = Token.createRange();  // [{ASSIGNED}]-[{M},{C}]
        base_char.mergeRanges(Token.getRange("ASSIGNED", true));
        base_char.subtractRanges(Token.getRange("M", true));
        base_char.subtractRanges(Token.getRange("C", true));

        Token virama = Token.createRange();
        for (int i = 0;  i < Token.viramaString.length(); i++) {
            virama.addRange(i, i);
        }

        Token combiner_wo_virama = Token.createRange();
        combiner_wo_virama.mergeRanges(Token.getRange("M", true));
        combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final
        combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras

        Token left = Token.createUnion();       // base_char?
        left.addChild(base_char);
        left.addChild(Token.token_empty);

        Token foo = Token.createUnion();
        foo.addChild(Token.createConcat(virama, Token.getRange("L", true)));
        foo.addChild(combiner_wo_virama);

        foo = Token.createClosure(foo);

        foo = Token.createConcat(left, foo);

        Token.token_grapheme = foo;
        return Token.token_grapheme;
    }

    /**
     * Combing Character Sequence in Perl 5.6.
     * <p>
     * e('',''); Tokencategoriesput("IsGraph",TokencomplementRanges(isnotgraph)); Tokencategories2put("IsGra
     * ph",isnotgraph); TokenregisterNonXS("IsGraph");。
     * 
     * Token isxdigit = TokencreateRange(); isxdigitaddRange('0','9'); isxdigitaddRange('A','F'); isxdigitad
     * dRange('a','f'); Tokencategoriesput("IsXDigit",TokencomplementRanges(isxdigit)); Tokencategories2put(
     * "IsXDigit",isxdigit); TokenregisterNonXS("IsXDigit");。
     * 
     *  TokensetAlias("IsDigit","Nd",true); TokensetAlias("IsUpper","Lu",true); TokensetAlias("IsLower","L1"
     * ,true); TokensetAlias("IsCntrl","C",true); TokensetAlias("IsPrint","C",false); TokensetAlias("IsPunct
     * ","P",true); TokenregisterNonXS("IsDigit"); TokenregisterNonXS("IsUpper"); TokenregisterNonXS("IsLowe
     * r"); TokenregisterNonXS("IsCntrl"); TokenregisterNonXS("IsPrint"); TokenregisterNonXS("IsPunct");。
     * 
     * TokensetAlias("alpha","IsAlpha",true); TokensetAlias("alnum","IsAlnum",true); TokensetAlias("ascii","
     * IsASCII",true); TokensetAlias("cntrl","IsCntrl",true); TokensetAlias("digit","IsDigit",true); Tokense
     * tAlias("graph","IsGraph",true); TokensetAlias("lower","IsLower",true); TokensetAlias("print","IsPrint
     * ",true); TokensetAlias("punct","IsPunct",true); TokensetAlias("space","IsSpace",true); TokensetAlias(
     * "upper","IsUpper",true); TokensetAlias("word","IsWord",true); // Perl extension TokensetAlias("xdigit
     * ","IsXDigit",true); TokenregisterNonXS("alpha"); TokenregisterNonXS("alnum"); TokenregisterNonXS("asc
     * ii"); TokenregisterNonXS("cntrl"); TokenregisterNonXS("digit"); TokenregisterNonXS("graph"); Tokenreg
     * isterNonXS("lower"); TokenregisterNonXS("print");令牌registerNonXS("punct"); TokenregisterNonXS("space"
     * ); TokenregisterNonXS("upper"); TokenregisterNonXS("word"); TokenregisterNonXS("xdigit"); } // synchr
     * onized} // if null RangeToken tok = positive? (RangeToken)Tokencategoriesget(name)：(RangeToken)Tokenc
     * ategories2get(name); // if(tok == null)Systemoutprintln(name);回报; } static protected RangeToken getRa
     * nge(String name,boolean positive,boolean xs){RangeToken range = TokengetRange(name,positive); if(xs && range！= null && TokenisRegisterNonXS(name))range = null;返回范围; }
     * }。
     * 
     * static Hashtable nonxs = null; / **此方法仅由getRange()调用所以此方法不需要MT安全
     * 
     */
    static private Token token_ccs = null;
    static synchronized Token getCombiningCharacterSequence() {
        if (Token.token_ccs != null)
            return Token.token_ccs;

        Token foo = Token.createClosure(Token.getRange("M", true)); // \pM*
        foo = Token.createConcat(Token.getRange("M", false), foo); // \PM + \pM*
        Token.token_ccs = foo;
        return Token.token_ccs;
    }

    // ------------------------------------------------------

    // ------------------------------------------------------
    /**
     * This class represents a node in parse tree.
     * <p>
     */
    static class StringToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = -4614366944218504172L;

        String string;
        final int refNumber;

        StringToken(int type, String str, int n) {
            super(type);
            this.string = str;
            this.refNumber = n;
        }

        int getReferenceNumber() {              // for STRING
            return this.refNumber;
        }
        String getString() {                    // for STRING
            return this.string;
        }

        public String toString(int options) {
            if (this.type == BACKREFERENCE)
                return "\\"+this.refNumber;
            else
                return REUtil.quoteMeta(this.string);
        }
    }

    /**
     * This class represents a node in parse tree.
     * <p>
     *  组合字符序列在Perl 56
     * 
     */
    static class ConcatToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = 8717321425541346381L;

        final Token child;
        final Token child2;

        ConcatToken(Token t1, Token t2) {
            super(Token.CONCAT);
            this.child = t1;
            this.child2 = t2;
        }

        int size() {
            return 2;
        }
        Token getChild(int index) {
            return index == 0 ? this.child : this.child2;
        }

        public String toString(int options) {
            String ret;
            if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) {
                ret = this.child.toString(options)+"+";
            } else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) {
                ret = this.child.toString(options)+"+?";
            } else
                ret = this.child.toString(options)+this.child2.toString(options);
            return ret;
        }
    }

    /**
     * This class represents a node in parse tree.
     * <p>
     *  这个类表示解析树中的一个节点
     * 
     */
    static class CharToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = -4394272816279496989L;

        final int chardata;

        CharToken(int type, int ch) {
            super(type);
            this.chardata = ch;
        }

        int getChar() {
            return this.chardata;
        }

        public String toString(int options) {
            String ret;
            switch (this.type) {
              case CHAR:
                switch (this.chardata) {
                  case '|':  case '*':  case '+':  case '?':
                  case '(':  case ')':  case '.':  case '[':
                  case '{':  case '\\':
                    ret = "\\"+(char)this.chardata;
                    break;
                  case '\f':  ret = "\\f";  break;
                  case '\n':  ret = "\\n";  break;
                  case '\r':  ret = "\\r";  break;
                  case '\t':  ret = "\\t";  break;
                  case 0x1b:  ret = "\\e";  break;
                    //case 0x0b:  ret = "\\v";  break;
                  default:
                    if (this.chardata >= 0x10000) {
                        String pre = "0"+Integer.toHexString(this.chardata);
                        ret = "\\v"+pre.substring(pre.length()-6, pre.length());
                    } else
                        ret = ""+(char)this.chardata;
                }
                break;

              case ANCHOR:
                if (this == Token.token_linebeginning || this == Token.token_lineend)
                    ret = ""+(char)this.chardata;
                else
                    ret = "\\"+(char)this.chardata;
                break;

              default:
                ret = null;
            }
            return ret;
        }

        boolean match(int ch) {
            if (this.type == CHAR) {
                return ch == this.chardata;
            } else
                throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
        }
    }

    /**
     * This class represents a node in parse tree.
     * <p>
     *  这个类表示解析树中的一个节点
     * 
     */
    static class ClosureToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = 1308971930673997452L;

        int min;
        int max;
        final Token child;

        ClosureToken(int type, Token tok) {
            super(type);
            this.child = tok;
            this.setMin(-1);
            this.setMax(-1);
        }

        int size() {
            return 1;
        }
        Token getChild(int index) {
            return this.child;
        }

        final void setMin(int min) {
            this.min = min;
        }
        final void setMax(int max) {
            this.max = max;
        }
        final int getMin() {
            return this.min;
        }
        final int getMax() {
            return this.max;
        }

        public String toString(int options) {
            String ret;
            if (this.type == CLOSURE) {
                if (this.getMin() < 0 && this.getMax() < 0) {
                    ret = this.child.toString(options)+"*";
                } else if (this.getMin() == this.getMax()) {
                    ret = this.child.toString(options)+"{"+this.getMin()+"}";
                } else if (this.getMin() >= 0 && this.getMax() >= 0) {
                    ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}";
                } else if (this.getMin() >= 0 && this.getMax() < 0) {
                    ret = this.child.toString(options)+"{"+this.getMin()+",}";
                } else
                    throw new RuntimeException("Token#toString(): CLOSURE "
                                               +this.getMin()+", "+this.getMax());
            } else {
                if (this.getMin() < 0 && this.getMax() < 0) {
                    ret = this.child.toString(options)+"*?";
                } else if (this.getMin() == this.getMax()) {
                    ret = this.child.toString(options)+"{"+this.getMin()+"}?";
                } else if (this.getMin() >= 0 && this.getMax() >= 0) {
                    ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?";
                } else if (this.getMin() >= 0 && this.getMax() < 0) {
                    ret = this.child.toString(options)+"{"+this.getMin()+",}?";
                } else
                    throw new RuntimeException("Token#toString(): NONGREEDYCLOSURE "
                                               +this.getMin()+", "+this.getMax());
            }
            return ret;
        }
    }

    /**
     * This class represents a node in parse tree.
     * <p>
     *  这个类表示解析树中的一个节点
     * 
     */
    static class ParenToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = -5938014719827987704L;

        final Token child;
        final int parennumber;

        ParenToken(int type, Token tok, int paren) {
            super(type);
            this.child = tok;
            this.parennumber = paren;
        }

        int size() {
            return 1;
        }
        Token getChild(int index) {
            return this.child;
        }

        int getParenNumber() {
            return this.parennumber;
        }

        public String toString(int options) {
            String ret = null;
            switch (this.type) {
              case PAREN:
                if (this.parennumber == 0) {
                    ret = "(?:"+this.child.toString(options)+")";
                } else {
                    ret = "("+this.child.toString(options)+")";
                }
                break;

              case LOOKAHEAD:
                ret = "(?="+this.child.toString(options)+")";
                break;
              case NEGATIVELOOKAHEAD:
                ret = "(?!"+this.child.toString(options)+")";
                break;
              case LOOKBEHIND:
                ret = "(?<="+this.child.toString(options)+")";
                break;
              case NEGATIVELOOKBEHIND:
                ret = "(?<!"+this.child.toString(options)+")";
                break;
              case INDEPENDENT:
                ret = "(?>"+this.child.toString(options)+")";
                break;
            }
            return ret;
        }
    }

    /**
     * (?(condition)yes-pattern|no-pattern)
     * <p>
     *  这个类表示解析树中的一个节点
     * 
     */
    static class ConditionToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = 4353765277910594411L;

        final int refNumber;
        final Token condition;
        final Token yes;
        final Token no;
        ConditionToken(int refno, Token cond, Token yespat, Token nopat) {
            super(Token.CONDITION);
            this.refNumber = refno;
            this.condition = cond;
            this.yes = yespat;
            this.no = nopat;
        }
        int size() {
            return this.no == null ? 1 : 2;
        }
        Token getChild(int index) {
            if (index == 0)  return this.yes;
            if (index == 1)  return this.no;
            throw new RuntimeException("Internal Error: "+index);
        }

        public String toString(int options) {
            String ret;
            if (refNumber > 0) {
                ret = "(?("+refNumber+")";
            } else if (this.condition.type == Token.ANCHOR) {
                ret = "(?("+this.condition+")";
            } else {
                ret = "(?"+this.condition;
            }

            if (this.no == null) {
                ret += this.yes+")";
            } else {
                ret += this.yes+"|"+this.no+")";
            }
            return ret;
        }
    }

    /**
     * (ims-ims: .... )
     * <p>
     *  这个类表示解析树中的一个节点
     * 
     */
    static class ModifierToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = -9114536559696480356L;

        final Token child;
        final int add;
        final int mask;

        ModifierToken(Token tok, int add, int mask) {
            super(Token.MODIFIERGROUP);
            this.child = tok;
            this.add = add;
            this.mask = mask;
        }

        int size() {
            return 1;
        }
        Token getChild(int index) {
            return this.child;
        }

        int getOptions() {
            return this.add;
        }
        int getOptionsMask() {
            return this.mask;
        }

        public String toString(int options) {
            return "(?"
                +(this.add == 0 ? "" : REUtil.createOptionString(this.add))
                +(this.mask == 0 ? "" : REUtil.createOptionString(this.mask))
                +":"
                +this.child.toString(options)
                +")";
        }
    }

    /**
     * This class represents a node in parse tree.
     * for UNION or CONCAT.
     * <p>
     *  (?(条件)是模式|无模式)
     * 
     */
    static class UnionToken extends Token implements java.io.Serializable {

        private static final long serialVersionUID = -2568843945989489861L;

        Vector children;

        UnionToken(int type) {
            super(type);
        }

        void addChild(Token tok) {
            if (tok == null)  return;
            if (this.children == null)  this.children = new Vector();
            if (this.type == UNION) {
                this.children.addElement(tok);
                return;
            }
                                                // This is CONCAT, and new child is CONCAT.
            if (tok.type == CONCAT) {
                for (int i = 0;  i < tok.size();  i ++)
                    this.addChild(tok.getChild(i)); // Recursion
                return;
            }
            int size = this.children.size();
            if (size == 0) {
                this.children.addElement(tok);
                return;
            }
            Token previous = (Token)this.children.elementAt(size-1);
            if (!((previous.type == CHAR || previous.type == STRING)
                  && (tok.type == CHAR || tok.type == STRING))) {
                this.children.addElement(tok);
                return;
            }

            //System.err.println("Merge '"+previous+"' and '"+tok+"'.");

            StringBuffer buffer;
            int nextMaxLength = (tok.type == CHAR ? 2 : tok.getString().length());
            if (previous.type == CHAR) {        // Replace previous token by STRING
                buffer = new StringBuffer(2 + nextMaxLength);
                int ch = previous.getChar();
                if (ch >= 0x10000)
                    buffer.append(REUtil.decomposeToSurrogates(ch));
                else
                    buffer.append((char)ch);
                previous = Token.createString(null);
                this.children.setElementAt(previous, size-1);
            } else {                            // STRING
                buffer = new StringBuffer(previous.getString().length() + nextMaxLength);
                buffer.append(previous.getString());
            }

            if (tok.type == CHAR) {
                int ch = tok.getChar();
                if (ch >= 0x10000)
                    buffer.append(REUtil.decomposeToSurrogates(ch));
                else
                    buffer.append((char)ch);
            } else {
                buffer.append(tok.getString());
            }

            ((StringToken)previous).string = new String(buffer);
        }

        int size() {
            return this.children == null ? 0 : this.children.size();
        }
        Token getChild(int index) {
            return (Token)this.children.elementAt(index);
        }

        public String toString(int options) {
            String ret;
            if (this.type == CONCAT) {
                if (this.children.size() == 2) {
                    Token ch = this.getChild(0);
                    Token ch2 = this.getChild(1);
                    if (ch2.type == CLOSURE && ch2.getChild(0) == ch) {
                        ret = ch.toString(options)+"+";
                    } else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) {
                        ret = ch.toString(options)+"+?";
                    } else
                        ret = ch.toString(options)+ch2.toString(options);
                } else {
                    StringBuffer sb = new StringBuffer();
                    for (int i = 0;  i < this.children.size();  i ++) {
                        sb.append(((Token)this.children.elementAt(i)).toString(options));
                    }
                    ret = new String(sb);
                }
                return ret;
            }
            if (this.children.size() == 2 && this.getChild(1).type == EMPTY) {
                ret = this.getChild(0).toString(options)+"?";
            } else if (this.children.size() == 2
                       && this.getChild(0).type == EMPTY) {
                ret = this.getChild(1).toString(options)+"??";
            } else {
                StringBuffer sb = new StringBuffer();
                sb.append(((Token)this.children.elementAt(0)).toString(options));
                for (int i = 1;  i < this.children.size();  i ++) {
                    sb.append((char)'|');
                    sb.append(((Token)this.children.elementAt(i)).toString(options));
                }
                ret = new String(sb);
            }
            return ret;
        }
    }
}
